import requests as req;
import os;
import re;
import time;

if __name__ == '__main__':


    if(not os.path.exists("bug_file/images")):
        os.mkdir("bug_file/images");

    url = "https://www.qiushibaike.com/article/124008351";

    # UA伪装
    header = {
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
        "Mobile-User-Agent":"Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36"
    }

    # 获取到整个页面
    html_text = req.get(url=url,headers=header).text;

    # 需要根据html来写正则
    ex = '<div class="thumb">(.*?)</div>';
    imgs = re.findall(ex,html_text,re.S);

    exx = '<img src="(.*?)"';
    srcs = re.findall(exx,imgs[0],re.S);

    for item in srcs:
        src = "https:"+item;

        file_name = str(time.time()) + ".png";

        with open("bug_file/images/"+file_name,"wb") as wf:
            byte = req.get(url=src,headers=header).content;
            wf.write(byte);

    print("爬取成功!!",f"本次共爬取图片{len(srcs)}张");